# Clears work environment
rm(list = ls())

# Sets working directory
setwd('C:/Users/Jason/Box Sync/Home Folder jdt34/733 - Maximum Likelihood Estimation/Lai & Slater 2006 replication')

# Loads data and libraries
load(file='cleanedLSdata.RData')
load(file='newdata.RData')
load(file='panel1_data.RData')
loadPkg(c(packs, 'maps'))

# Calculates statistics mentioned in the panel
OStotalMIDs = sum(newdata$majorinitone)
OStotalCYs = length(which(newdata$majorinitone>0))
OSpcZeroOne = length(which(newdata$majorinitone<2)) / nrow(newdata)
newdata$nbresid = newdata$majorinitone - newdata$nb
newdata$ziresid = newdata$majorinitone - newdata$zi
OSnbrmse = sqrt(mean(newdata$nbresid^2))
OSzirmse = sqrt(mean(newdata$ziresid^2))

# Plots dependent variable over time
OSmidyear = by(newdata$majorinitone, factor(newdata$year), sum, simplify=F)
OSmidyear = do.call(rbind.data.frame, OSmidyear)
OSmidyear$year = as.numeric(rownames(OSmidyear))
colnames(OSmidyear)[1] = 'MIDs'
OSdvts = ggplot(data=OSmidyear, aes(x=year, y=MIDs)) + 
  geom_line(size=2, color='#F8766D') +
  scale_x_continuous(breaks=seq(1993, 2005, 2)) +
  xlab('') +
  ylab('') +
  ylim(0, 30)
ggsave(file='OSdvts.pdf', OSdvts, width=9, height=1.5, units='in')

# Plots dependent variable over space
OSmap.dat = map_data("world")
OSmap.dat$ccode = NA
for(i in 1:nrow(map3)) {
  OSmap.dat$ccode[which(OSmap.dat$region==map3[i,1])] = map3[i,2]
}
OSmap.dat$ccode = as.numeric(OSmap.dat$ccode)
OSmap.dat$mids = NA

OSallRegions = unique(OSmap.dat$region)
OSnoccode = unique(OSmap.dat$region[is.na(OSmap.dat$ccode)])
OSdiscardName = setdiff(seq(1, length(OSnoccode), 1), c(8:10, 17))
OSkeepName = setdiff(OSallRegions, OSnoccode[OSdiscardName])
OSmap.dat = OSmap.dat[which(OSmap.dat$region %in% OSkeepName),]

OSmidsum = by(newdata$majorinitone, factor(newdata$ccode), sum, simplify=F)
OSmidsum = do.call(rbind.data.frame, OSmidsum) 
OSmidsum$id = rownames(OSmidsum)
rownames(OSmidsum) = NULL
colnames(OSmidsum)[1] = 'MIDs'
OSmidsum[which(OSmidsum$MIDs==0), 1] = NA

for(i in 1:nrow(OSmidsum)) {
  OSmap.dat$mids[which(OSmap.dat$ccode==OSmidsum[i,2])] = OSmidsum[i,1]
}

OSdvmap = ggplot(OSmap.dat, aes(x=long, y=lat, group=group, fill=mids)) + 
  geom_polygon() + 
  scale_fill_gradient(low='#FFFFFF', high='#F8766D', limits=c(1, 20), breaks=c(1, seq(5, 20, 5))) +
  theme(panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        legend.key.width=unit(4, 'cm'),
        legend.position='top',
        legend.title=element_blank())
ggsave(file='OSdvmap.pdf', OSdvmap, width=9, height=5, units='in')

save(file='panel8_out-of-sample_data_and_conclusions.RData', OStotalMIDs, OStotalCYs, OSpcZeroOne, 
     OSnbrmse, OSzirmse, OSmidyear, OSdvts, OSmap.dat, OSmidsum, OSdvmap, newdata)